import pandas as pd
import os

def split_papers_by_volume_issue(input_file):
    """按卷号和期号拆分论文数据"""
    print("正在读取文件...")
    
    try:
        # 读取CSV文件
        df = pd.read_csv(input_file)
        
        # 选择需要保留的列
        columns_to_keep = ['volume', 'issue', 'title', 'doi', 'abstract', 'article_number']
        df = df[columns_to_keep]
        
        # 按卷号和期号分组
        grouped = df.groupby(['volume', 'issue'])
        
        # 创建输出目录
        output_dir = 'split_papers'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        
        # 遍历每个分组并保存
        total_files = len(grouped)
        for i, ((volume, issue), group_df) in enumerate(grouped, 1):
            output_file = os.path.join(output_dir, f'ieee_papers_{volume}_{issue}.csv')
            print(f"正在保存第 {i}/{total_files} 个文件: {output_file}")
            
            # 保存为CSV，不包含索引
            group_df.to_csv(output_file, index=False, encoding='utf-8')
            
        print(f"\n完成! 共拆分为 {total_files} 个文件")
        print(f"文件保存在: {os.path.abspath(output_dir)}")
            
    except Exception as e:
        print(f"处理文件时出错: {str(e)}")

if __name__ == "__main__":
    input_file = "ieee_papers.csv"
    split_papers_by_volume_issue(input_file)